<HTML>
	<HEAD>
	<META http-equiv="PICS-Label" content="(PICS-1.0 "http://www.classify.org/safesurf/" l on "1996.08.22T13:44+0000
	r (SS~~000 1)"">
	<SCRIPT language="JavaScript">
	<!-- hide this script tag's contents from old browsers

	function Abs(x) { return Math.abs(x) }
	function Sqrt(x) { return Math.sqrt(x) }
	function Exp(x) { return Math.exp(x) }
	function Ln(x) { return Math.log(x) }
	function Power(x,n) { return Math.pow(x,n) }

	var Pi = 3.141592653589793;
	var PiD2 = Pi/2;

	function ChiSq(x,n) {
	if(x>1000 \| n>1000) { var q=Norm((Power(x/n,1/3)+2/(9n)-1)/Sqrt(2/(9n)))/2; if (x>n) {return q} else {return 1-q} }
	var p=Math.exp(-0.5x); if((n%2)==1) { p=pMath.sqrt(2*x/Pi) }
	var k=n; while(k>=2) { p=p*x/k; k=k-2 }
	var t=p; var a=n; while(t>1e-15p) { a=a+2; t=tx/a; p=p+t }
	return 1-p
	}

	function Norm(z) { var q=z*z
	if(Abs(z)>7) {return (1-1/q+3/(qq))Exp(-q/2)/(Abs(z)*Sqrt(PiD2))} else {return ChiSq(q,1) }
	}

	function Fmt(x) { var v;
	if(x>=0) { v=' '+(x+0.00005) } else { v=' '+(x-0.00005) }
	v = v.substring(0,v.indexOf('.')+5)
	return v.substring(v.length-10,v.length)
	}

	function Fmt3(x) { var v;
	v = " " + x;
	return v.substring(v.length-3,v.length)
	}

	function Fmt9(x) { var v;
	v = " " + x;
	return v.substring(v.length-9,v.length)
	}

	function vFmt(x) { var v;
	if(x>=0) { v=' '+(x+0.0000005) } else { v=' '+(x-0.0000005) }
	v = v.substring(0,v.indexOf('.')+7)
	return v.substring(v.length-14,v.length)
	}

	function Xlate(s,from,to) { var v = s;
	var l=v.indexOf(from);
	while(l>-1) {
	v = v.substring(0,l) + to + v.substring(l+1,v.length);
	l=v.indexOf(from)
	}
	return v
	}

	function crArr(n) {
	this.length = n
	for (var i = 0; i < this.length; i++) { this[i] = 0 }
	}

	function ix(j,k,nCols) { return j * nCols + k }

	var CR = unescape("%0D");
	var LF = unescape("%0A");
	var Tb = unescape("%09");
	var NL = CR + LF;

	function Iterate(form) {

	var i = 0; var j = 0; var k = 0; var l = 0;

	var nC = eval(form.cPts.value);
	var nR = eval(form.cVar.value);
	var nP = nR + 1;
	var nP1 = nP + 1;
	var sY0 = 0;
	var sY1 = 0;
	var sC = 0;

	var X = new crArr( nC * ( nR + 1 ) );
	var Y0 = new crArr( nC );
	var Y1 = new crArr( nC );
	var xM = new crArr( nR + 1 );
	var xSD = new crArr( nR + 1 );
	var Par = new crArr( nP );
	var SEP = new crArr( nP );
	var Arr = new crArr( nP * nP1 );

	var da = Xlate(form.data.value,Tb,",");
	form.data.value = da;
	if( da.indexOf(NL)==-1 ) { if( da.indexOf(CR)>-1 ) { NL = CR } else { NL = LF } }

	for (i = 0; i<nC; i++) {
	X[ix(i,0,nR+1)] = 1;
	l = da.indexOf(NL); if( l==-1 ) { l = da.length };
	var v = da.substring(0,l);
	da = da.substring(l+NL.length,da.length);
	for (j = 1; j<=nR; j++) {
	l = v.indexOf(","); if( l==-1 ) { l = v.length };
	x = eval(v.substring(0,l))
	X[ix(i,j,nR+1)] = x;
	v = v.substring(l+1,v.length);
	}
	if(form.Grouped.checked=="1")
	{
	l = v.indexOf(","); if( l==-1 ) { l = v.length };
	x = eval(v.substring(0,l))
	Y0[i] = x; sY0 = sY0 + x;
	v = v.substring(l+1,v.length);
	l = v.indexOf(","); if( l==-1 ) { l = v.length };
	x = eval(v.substring(0,l))
	Y1[i] = x; sY1 = sY1 + x;
	v = v.substring(l+1,v.length);
	}
	else
	{
	x = eval(v.substring(0,l));
	if ( x==0 ) { Y0[i] = 1; sY0 = sY0 + 1 } else { Y1[i] = 1; sY1 = sY1 + 1 }
	}
	sC = sC + (Y0[i] + Y1[i]);
	for (j = 1; j<=nR; j++) {
	x = X[ix(i,j,nR+1)];
	xM[j] = xM[j] + (Y0[i] + Y1[i])*x;
	xSD[j] = xSD[j] + (Y0[i] + Y1[i])xx;
	}
	}

	var o = "Descriptives..." + NL;

	o = o + ( NL + sY0 + " cases have Y=0; " + sY1 + " cases have Y=1." + NL );

	o = o + ( NL + " Variable Avg SD " + NL );
	for (j = 1; j<=nR; j++) {
	xM[j] = xM[j] / sC;
	xSD[j] = xSD[j] / sC;
	xSD[j] = Sqrt( Abs( xSD[j] - xM[j] * xM[j] ) )
	o = o + ( " " + Fmt3(j) + " " + Fmt(xM[j]) + Fmt(xSD[j])+ NL );
	}
	xM[0] = 0; xSD[0] = 1;

	for (i = 0; i<nC; i++) {
	for (j = 1; j<=nR; j++) {
	X[ix(i,j,nR+1)] = ( X[ix(i,j,nR+1)] - xM[j] ) / xSD[j];
	}
	}

	o = o + ( NL + "Iteration History..." );
	form.output.value = o;

	Par[0] = Ln( sY1 / sY0 );
	for (j = 1; j<=nR; j++) {
	Par[j] = 0;
	}

	var LnV = 0; var Ln1mV = 0;

	var LLp = 2e+10;
	var LL = 1e+10;

	while( Abs(LLp-LL)>0.0000001 ) {
	LLp = LL;
	LL = 0;
	for (j = 0; j<=nR; j++) {
	for (k = j; k<=nR+1; k++) {
	Arr[ix(j,k,nR+2)] = 0;
	}
	}

	for (i = 0; i<nC; i++) {
	var v = Par[0];
	for (j = 1; j<=nR; j++) {
	v = v + Par[j] * X[ix(i,j,nR+1)];
	}
	if( v>15 ) { LnV = -Exp(-v); Ln1mV = -v; q = Exp(-v); v=Exp(LnV) }
	else { if( v<-15 ) { LnV = v; Ln1mV = -Exp(v); q = Exp(v); v=Exp(LnV) }
	else { v = 1 / ( 1 + Exp(-v) ); LnV = Ln(v); Ln1mV = Ln(1-v); q = v*(1-v) }
	}
	LL = LL - 2Y1[i]LnV - 2Y0[i]Ln1mV;
	for (j = 0; j<=nR; j++) {
	var xij = X[ix(i,j,nR+1)];
	Arr[ix(j,nR+1,nR+2)] = Arr[ix(j,nR+1,nR+2)] + xij * ( Y1[i] * (1 - v) + Y0[i] * (-v) );
	for (k=j; k<=nR; k++) {
	Arr[ix(j,k,nR+2)] = Arr[ix(j,k,nR+2)] + xij * X[ix(i,k,nR+1)] * q * (Y0[i] + Y1[i]);
	}
	}
	}

	o = o + ( NL + "-2 Log Likelihood = " + Fmt( LL ) );
	if( LLp==1e+10 ) { LLn = LL; o = o + " (Null Model)" }
	form.output.value = o;

	for (j = 1; j<=nR; j++) {
	for (k=0; k<j; k++) {
	Arr[ix(j,k,nR+2)] = Arr[ix(k,j,nR+2)];
	}
	}

	for (i=0; i<=nR; i++) { var s = Arr[ix(i,i,nR+2)]; Arr[ix(i,i,nR+2)] = 1;
	for (k=0; k<=nR+1; k++) { Arr[ix(i,k,nR+2)] = Arr[ix(i,k,nR+2)] / s; }
	for (j=0; j<=nR; j++) {
	if (i!=j) { s = Arr[ix(j,i,nR+2)]; Arr[ix(j,i,nR+2)] = 0;
	for (k=0; k<=nR+1; k++) {
	Arr[ix(j,k,nR+2)] = Arr[ix(j,k,nR+2)] - s * Arr[ix(i,k,nR+2)];
	}
	}
	}
	}

	for( j=0; j<=nR; j++) {
	Par[j] = Par[j] + Arr[ix(j,nR+1,nR+2)];
	}

	}

	o = o + ( " (Converged)" + NL );
	var CSq = LLn - LL;
	o = o + ( NL + "Overall Model Fit..." + NL + " Chi Square=" + Fmt(CSq) + "; df=" + nR + "; p=" + Fmt(ChiSq(CSq,nR)) + NL );

	o = o + ( NL + "Coefficients and Standard Errors..." + NL );
	o = o + ( " Variable Coeff. StdErr p" + NL );
	for( j=1; j<=nR; j++) {
	Par[j] = Par[j] / xSD[j];
	SEP[j] = Sqrt( Arr[ix(j,j,nP+1)] ) / xSD[j];
	Par[0] = Par[0] - Par[j] * xM[j];
	o = o + ( " " + Fmt3(j) + " " + Fmt(Par[j]) + Fmt(SEP[j]) + Fmt( Norm(Abs(Par[j]/SEP[j])) ) + NL );
	}
	o = o + ( "Intercept " + Fmt(Par[0]) + NL );

	o = o + ( NL + "Odds Ratios and 95% Confidence Intervals..." + NL );
	o = o + ( " Variable O.R. Low -- High" + NL );
	for( j=1; j<=nR; j++) {
	var ORc = Exp( Par[j] );
	var ORl = Exp( Par[j] - 1.96 * SEP[j] );
	var ORh = Exp( Par[j] + 1.96 * SEP[j] );
	o = o + ( " " + Fmt3(j) + " " + Fmt(ORc) + Fmt(ORl) + Fmt(ORh) + NL + NL );
	}

	for (j = 1; j<=nR; j++) {
	v = " X" + j;
	o = o + v.substring(v.length-10,v.length);
	}
	if(form.Grouped.checked=="1")
	{ o = o + ( " n0 n1 Calc Prob" + NL ) }
	else
	{ o = o + ( " Y Calc Prob" + NL ) }
	for (i = 0; i<nC; i++) {
	v = Par[0];
	for (j = 1; j<=nR; j++) {
	x = xM[j] + xSD[j] * X[ix(i,j,nR+1)];
	v = v + Par[j] * x;
	o = o + Fmt(x);
	}
	v = 1 / ( 1 + Exp( -v ) );
	if(form.Grouped.checked=="1")
	{ o = o + ( " " + Fmt9(Y0[i]) + " " + Fmt9(Y1[i]) + Fmt(v) + NL ) }
	else
	{ o = o + ( " " + Fmt9(Y1[i]) + Fmt(v) + NL ) }
	}

	form.output.value = o;

	}

	<!-- done hiding from old browsers -->
	</SCRIPT>
	<STYLE>
	<!--
	div.Section1
	{page:Section1;}
	span.SpellE
	{}
	-->
	</STYLE>
	<TITLE>Logistic Regression Calculating Page</TITLE>
	</HEAD>
	<BODY bgcolor="#c0c0c0">
	<CENTER>
	<H2>
	<FONT color="#0000ff" face="Arial">Logistic Regression </FONT>
	</H2>
	<P>
	<FONT face="Arial"> <SMALL><I>by John C. Pezzullo,
	</I></SMALL><I><SMALL>instruction </SMALL><FONT size="2">modifications by
	Kevin M. Sullivan</FONT></I></FONT>
	<P>
	<FONT face="Arial" size="2"><I>Version 05.07.20</I></FONT>
	</CENTER>
	<P>
	<FONT face="Arial">This page performs logistic regression, in which a dichotomous
	outcome is predicted by one or more variables. The program generates the
	coefficients of a prediction formula (and standard errors of estimate and
	significance levels), and odds ratios (with 95% confidence intervals).</FONT>
	<P>
	<HR>
	<HR>
	<FORM method="post">
	<H3>
	<FONT color="#0000ff" face="Arial"><A name="Instructions">Instructions</A>:</FONT>
	</H3>
	<OL>
	<LI>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial">Enter the <FONT color="#0000ff">number of data
	points</FONT>:
	<INPUT name="cPts" value="10" size="4" type="text"> (or, if summary data,
	the number of lines of data). </FONT>
	<LI>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial">Enter the <FONT color="#0000ff">number of predictor
	variables</FONT>:
	<INPUT name="cVar" value="1" size="2" type="text"> </FONT>
	<LI>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial">If summary data, check here
	<INPUT name="Grouped" value="1" type="checkbox"> </FONT>
	<LI>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial">Type or paste data in the <B>Data Window</B> below (see
	lower section on page concerning issues on data formatting)</FONT>
	</OL>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT color="#0000ff" face="Arial Black">Data Window</FONT>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<TEXTAREA name="data" rows="14" cols="80">1,0
	2,0
	3,0
	4,0
	5,1
	6,0
	7,1
	8,0
	9,1
	10,1
	</TEXTAREA><BR>

	<OL start="5">
	<LI>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT color="#0000ff" face="Arial">Click the
	<INPUT value="Solve" onclick="Iterate(this.form)" type="button"></FONT><FONT
	color="Red" face="Arial"><FONT color="#0000ff" face="Arial"> button;</FONT>
	</FONT><FONT face="Arial">results will appear in the <B>Results Window</B>
	below:</FONT>
	</OL>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT color="#0000ff" face="Arial Black"><B>Results Window</B></FONT>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<TEXTAREA name="output" rows="11" cols="80"></TEXTAREA><BR>
	<OL start="6">
	<LI>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial">To print out results, copy (Ctrl-C) and paste (Ctrl-V)
	the contents of the <B>Results Window</B> to a word processor or text editor,
	then print the results from that program. For best appearance, specify a
	fixed-width font like Courier. </FONT>
	</OL>
	<P>
	<HR>
	<H3>
	<FONT color="#0000ff" face="Arial">Data Examples</FONT>
	</H3>
	<P>
	<FONT face="Arial">A number of examples are provided on the format to enter
	data.  All examples are based on the Evans County data set described
	in Kleinbaum, Kupper, and Morgenstern, <I>Epidemiologic Research: Principles
	and Quantitative Methods</I>, New York: Van Nostrand Reinhold, 1982.
	The Evans County study was a cohort study of men followed for 7 years.
	The files are also available as text files to allow the user to cut and paste
	the example data into the Data Window.</FONT>
	<P>
	<FONT face="Arial">Data can be in two formats - records at the <B>
	individual</B> level (one record for each individual or whatever the unit
	of analysis) or the data could be <B> summary</B> information, such as the
	number of individuals at an exposure level with<I>out</I> disease and the
	number with disease.  The data on one line must be separated by a tab
	or a comma; the examples below use the comma to separate data points.
	These examples first describe data at the individual level, and then describe
	summary data.</FONT>
	<P>
	<FONT face="Arial"><B>Data at the individuals level, one exposure
	variable</B></FONT>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial">Enter or paste the data into the Data Window a dichotomous
	exposure variable (coded as 1 for exposed and 0 for unexposed) and the outcome
	variable (coded as 1 for with the outcome and 0 for without the outcome)
	with the two variables separated by a "," or a tab.  For example, in
	assessing the relationship between an elevated catecholamine level (the exposure
	of interest, 1= elevated and 0= normal) and coronary heart disease (CHD,
	the outcome of interest), the records would be formatted as numeric values
	for:</FONT>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">exposure variable value, outcome variable
	value</FONT>
	<P>
	<FONT face="Arial">For this example data the number of data points is 609
	and the number of predictor variables is 1.  The first 10 records from
	the example data are shown below:</FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">0, 0<BR>
	0, 0<BR>
	1, 1<BR>
	1, 0<BR>
	0, 0<BR>
	0, 0<BR>
	0, 1<BR>
	0, 0<BR>
	0, 0<BR>
	0, 0 </FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">... (plus 599 additional lines)</FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">

	<P>
	<FONT face="Arial">The full data file as a text file can be found
	<A href="http://www.sph.emory.edu/%7Ecdckms/Logistic/cach.txt">here</A>.
	The results of the analysis would be:</FONT>
	<P>
	<FONT face="Courier New" size="2">Odds Ratios and 95% Confidence
	Intervals...<BR>
	Variable  O.R.    Low -- High<BR>
	1         2.8615 1.6878
	4.8514</FONT><BR>

	<P>
	<FONT face="Arial">The interpretation would be that individuals with elevated
	catecholamine levels have a 2.8615 greater odds of developing CHD compared
	to individuals with normal catecholamine levels.</FONT>
	<P>
	<FONT face="Arial"><I>[A note on coding the <B> exposure</B> variable:
	The above example coded the exposed as 1 and unexposed as 0, and the odds
	ratio was calculated  comparing the odds of being coded as 1 to being
	coded as 0 - note that those coded as 0 are the referent group.  If
	you code the exposure as 1 and 2, the smaller number will be treated as the
	referent group, which in this example is 1.  The odds ratio for a 2/1
	coding scheme would be the odds of disease for those coded as 2 compared
	to the odds in those coded as 1.]</I></FONT>
	<P>
	<FONT face="Arial"><I>[A note on coding the <B> outcome</B> variable: The
	outcome variable must be coded as 1 for those with the outcome and 0 for
	those without the outcome.]</I></FONT>
	<P>
	<FONT face="Arial">If the exposure variable is continuous, you can use the
	numeric value (which assumes the relationship is linear on a logit scale).
	For example, in assessing the relationship between age and CHD, the number
	of data points is 609 and the number of predictor variables is 1, and the
	first ten records would look like as shown below (data as a text file can
	be found
	<A href="http://www.sph.emory.edu/%7Ecdckms/Logistic/agch.txt">here</A>):</FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">56, 0<BR>
	43, 0<BR>
	56, 1<BR>
	64, 0<BR>
	49, 0<BR>
	46, 0<BR>
	52, 1<BR>
	63, 0<BR>
	42, 0<BR>
	55, 0</FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">... (plus 599 additional lines)</FONT>
	<P>
	<FONT face="Arial">The results of the analysis would be:</FONT>
	<P>
	<FONT face="Courier New" size="2">Odds Ratios and 95% Confidence
	Intervals...<BR>
	Variable  O.R.    Low -- High<BR>
	1         1.0454 1.0189 1.0727</FONT>
	<P>
	<FONT face="Arial" size="2">T</FONT><FONT face="Arial">he interpretation
	would be that for every one year increase in age, the odds of CHD increased
	by a factor of 1.0454 (or by about 4.5%).</FONT>
	<P>
	<FONT face="Arial"><B>Data at the individuals level, two exposure variables
	- no interaction model</B></FONT>
	<P>
	<FONT face="Arial">If there is more than one exposure variable, list the
	exposure variables first and the outcome variable last.  For example,
	say the investigator wants to determine the simultaneous effect of catecholamine
	and cigarette smoking (1=smoker, 0=nonsmoker) on CHD, the data would be:</FONT>
	<P>
	<FONT face="Arial" size="2">first exposure variable value, second exposure
	variable value, outcome variable value</FONT>
	<P>
	<FONT face="Arial">For this example data the number of data points is 609
	and the number of predictor variables is 2.  The first 10 records from
	the example data are shown below with the variable being catecholamine, smoking,
	and CHD and the data in a text file is
	<A href="http://www.sph.emory.edu/%7Ecdckms/Logistic/casmch.txt">here</A>:</FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">0, 0, 0<BR>
	0, 1, 0<BR>
	1, 1, 1<BR>
	1, 1, 0<BR>
	0, 1, 0<BR>
	0, 1, 0<BR>
	0, 1, 1<BR>
	0, 0, 0<BR>
	0, 1, 0<BR>
	0, 0, 0</FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">... (plus 599 additional lines)</FONT>
	<P>
	<FONT face="Arial">The results of the analysis would be:</FONT>
	<P>
	<FONT face="Courier New" size="2">Odds Ratios and 95% Confidence
	Intervals...<BR>
	Variable   O.R.   Low -- High<BR>
	1         2.9074 1.7079 4.9492<BR>
	2         2.0000 1.1206 3.5695</FONT>
	<P>
	<FONT face="Arial" size="2">T</FONT><FONT face="Arial">he interpretation
	would be that individuals with an elevated catecholamine level ("Variable
	1" in the above output) have an odds of CHD about 2.9 times greater than
	those with normal catecholamine levels controlling for cigarette smoking.
	Cigarette smokers ("Variable" 2 in the above output) have twice the odds
	(2.0) of CHD compared to nonsmokers controlling for catecholamine (elevated
	vs. normal).</FONT>
	<P>
	<FONT face="Arial"><B>Data at the individuals level, two exposure variables
	- interaction model</B></FONT>
	<P>
	<FONT face="Arial">If you would like to assess the interaction between two
	variables, there will need to be an interaction term.  Using the data
	from the previous example, the question might be whether cigarette smoking
	modifies the catecholamine->CHD relationship.  The interaction term
	is simply multiplying the value for catecholamine times the value for smoking,
	of which there are only four possibilities with these two variables:</FONT>
	<TABLE style="border-collapse: collapse;" id="AutoNumber1" border="1" bordercolor="#111111"
	cellpadding="0" cellspacing="0" width="47%">
	<TR>
	<TD align="center" width="19%"><FONT face="Arial" size="2">Catecholamine</FONT></TD>
	<TD align="center" width="20%"> </TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">Smoking</FONT></TD>
	<TD align="center" width="20%"> </TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">Interaction</FONT></TD>
	</TR>
	<TR>
	<TD align="center" width="19%"><FONT face="Arial" size="2">1</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">x</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">1</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">=</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">1</FONT></TD>
	</TR>
	<TR>
	<TD align="center" width="19%"><FONT face="Arial" size="2">1</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">x</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">0</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">=</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">0</FONT></TD>
	</TR>
	<TR>
	<TD align="center" width="19%"><FONT face="Arial" size="2">0</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">x</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">1</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">=</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">0</FONT></TD>
	</TR>
	<TR>
	<TD align="center" width="19%"><FONT face="Arial" size="2">0</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">x</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">0</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">=</FONT></TD>
	<TD align="center" width="20%"><FONT face="Arial" size="2">0</FONT></TD>
	</TR>
	</TABLE>
	<P>
	<FONT face="Arial">The data would be in the following format:</FONT>
	<P>
	<FONT face="Arial" size="2">first exposure variable value, second exposure
	variable value, interaction value, outcome variable value</FONT>
	<P>
	<FONT face="Arial">For this example data the number of data points is 609
	and the number of predictor variables is 3.  The first 10 records from
	the example data are shown below with the variables being catecholamine,
	smoking, the catecholamine-smoking interaction, and CHD and the data file
	as text can be found
	<A href="http://www.sph.emory.edu/%7Ecdckms/Logistic/casmich.txt">here</A>:</FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">0, 0, 0, 0<BR>
	0, 1, 0, 0<BR>
	1, 1, 1, 1<BR>
	1, 1, 1, 0<BR>
	0, 1, 0, 0<BR>
	0, 1, 0, 0<BR>
	0, 1, 0, 1<BR>
	0, 0, 0, 0<BR>
	0, 1, 0, 0<BR>
	0, 0, 0, 0</FONT>
	<P style="word-spacing: 0pt; line-height: 100%; margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial" size="2">... (plus 599 additional lines)</FONT>
	<P>
	<FONT face="Arial">The results of the analysis would be:</FONT>
	<P>
	<FONT face="Courier New" size="2">Coefficients and Standard Errors...<BR>
	Variable   Coeff. StdErr    p<BR>
	1          1.3953 0.5187
	0.0072<BR>
	2          0.8653 0.3864
	0.0251<BR>
	3         -0.4498 0.6092 0.4603<BR>
	Intercept -2.9267<BR>
	<BR>
	Odds Ratios and 95% Confidence Intervals...<BR>
	Variable    O.R.   Low -- High<BR>
	1          4.0360 1.4601
	11.1562<BR>
	2          2.3758 1.1141
	5.0661<BR>
	3          0.6377 0.1932
	2.1049</FONT>
	<P>
	<FONT face="Arial" size="2">T</FONT><FONT face="Arial">he interpretation
	would be that the interaction is not statistically significant (p-value for
	variable 3 = 0.4603) and could be removed from the model.  Another way
	to tell that the interaction is not significant is based on the odds ratio
	confidence interval for the interaction term; the null value (when there
	is no interaction) for an interaction term is 1; the 95% confidence interval
	for the odds ratio around the interaction term goes from 0.1932 to 2.1049
	which includes the "null value" of 1. </FONT>
	<P>
	<FONT face="Arial"><B>Summary data, one exposure variable</B></FONT>
	<P>
	<FONT face="Arial">This program can also analyze summary data.  For
	example, the table below summarizes information on 609 individuals by exposure
	(catecholamine) and disease (CHD):</FONT>
	<TABLE style="border-collapse: collapse;" id="AutoNumber2" border="1" bordercolor="#111111"
	cellpadding="0" cellspacing="0" height="69" width="46%">
	<TR>
	<TD height="10" width="35%"><FONT face="Arial">Elevated Catecholamine?</FONT></TD>
	<TD colspan="2" align="center" height="10" width="40%"><FONT face="Arial"
	size="2">CHD (Disease variable)</FONT></TD>
	</TR>
	<TR>
	<TD height="18" width="35%"><FONT face="Arial" size="2">(Exposure
	variable)</FONT></TD>
	<TD align="center" height="18" width="19%"><FONT face="Arial" size="2">Yes
	(1)</FONT></TD>
	<TD align="center" height="18" width="21%"><FONT face="Arial" size="2">No
	(0)</FONT></TD>
	</TR>
	<TR>
	<TD height="19" width="35%"><FONT face="Arial" size="2">
	Yes (1)</FONT></TD>
	<TD align="center" height="19" width="19%"><FONT face="Arial" size="2">27</FONT></TD>
	<TD align="center" height="19" width="21%"><FONT face="Arial" size="2">95</FONT></TD>
	</TR>
	<TR>
	<TD height="19" width="35%"><FONT face="Arial" size="2">
	No (0)</FONT></TD>
	<TD align="center" height="19" width="19%"><FONT face="Arial" size="2">44</FONT></TD>
	<TD align="center" height="19" width="21%"><FONT face="Arial" size="2">443</FONT></TD>
	</TR>
	</TABLE>
	<P>
	<FONT face="Arial">The data can be entered as summary data in two lines in
	the format:</FONT>
	<P>
	<FONT face="Arial">exposure variable level, number <I>
	with<B><U>out</U></B></I> disease at this exposure level, number <I>with</I>
	disease at this exposure level</FONT>
	<P>
	<FONT face="Arial">For this example data the number of data points is 2,
	the number of predictor variables is 1, and check the summary data box.
	The complete example data are shown below with the variable being exposure
	category, number without CHD in exposure category, and number with CHD in
	exposure category.  You could copy these data and paste them in the
	Data Window.</FONT>
	<P>
	<FONT size="2"><FONT face="Arial">1, 95, 27</FONT></FONT><BR>
	<FONT size="2"><FONT face="Arial">0, 443, 44</FONT></FONT>
	<P>
	<FONT size="2"> <FONT face="Arial">Th</FONT></FONT><FONT face="Arial">e results
	of the analysis would be as follows, exactly the same as the <B><I>Data at
	the individuals level, one exposure variable </I></B>example shown previously
	based on the same data.</FONT>
	<P>
	<FONT face="Courier New" size="2">Odds Ratios and 95% Confidence
	Intervals...<BR>
	Variable  O.R.    Low -- High<BR>
	1         2.8615 1.6878 4.8514</FONT>
	<P>
	<B><FONT face="Arial">Summary data, two exposure variables</FONT></B>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial">In this example is described a situation where there
	are two exposure levels, one considered as the primary exposure of interest
	and another as potentially an effect modifier, confounder, significant
	independent exposure, or none of these.  As an example, an investigators
	are interested in the relationship between an elevated catecholamine and
	CHD, but want to determine if this relationship is affected by the smoking
	status of the individual.  The data are as follows:</FONT>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<B><FONT face="Arial">Smoke = Yes (1)</FONT></B>
	<TABLE style="border-collapse: collapse;" id="AutoNumber2" border="1" bordercolor="#111111"
	cellpadding="0" cellspacing="0" height="69" width="46%">
	<TR>
	<TD height="10" width="36%"><FONT face="Arial">Elevated Catecholamine?</FONT></TD>
	<TD colspan="2" align="center" height="10" width="39%"><FONT face="Arial"
	size="2">CHD (Disease variable)</FONT></TD>
	</TR>
	<TR>
	<TD height="18" width="36%"><FONT face="Arial" size="2">(Exposure
	variable)</FONT></TD>
	<TD align="center" height="18" width="19%"><FONT face="Arial" size="2">Yes
	(1)</FONT></TD>
	<TD align="center" height="18" width="20%"><FONT face="Arial" size="2">No
	(0)</FONT></TD>
	</TR>
	<TR>
	<TD height="19" width="36%"><FONT face="Arial" size="2">
	Yes (1)</FONT></TD>
	<TD align="center" height="19" width="19%"><FONT face="Arial" size="2">19</FONT></TD>
	<TD align="center" height="19" width="20%"><FONT face="Arial" size="2">58</FONT></TD>
	</TR>
	<TR>
	<TD height="19" width="36%"><FONT face="Arial" size="2">
	No (0)</FONT></TD>
	<TD align="center" height="19" width="19%"><FONT face="Arial" size="2">35</FONT></TD>
	<TD align="center" height="19" width="20%"><FONT face="Arial" size="2">275</FONT></TD>
	</TR>
	</TABLE>
	<P style="margin-top: 0pt; margin-bottom: 0pt;">
	<FONT face="Arial"><B>Smoke = No (0)</B></FONT>
	<TABLE style="border-collapse: collapse;" id="AutoNumber2" border="1" bordercolor="#111111"
	cellpadding="0" cellspacing="0" height="69" width="46%">
	<TR>
	<TD height="10" width="37%"><FONT face="Arial">Elevated Catecholamine?</FONT></TD>
	<TD colspan="2" align="center" height="10" width="38%"><FONT face="Arial"
	size="2">CHD (Disease variable)</FONT></TD>
	</TR>
	<TR>
	<TD height="18" width="37%"><FONT face="Arial" size="2">(Exposure
	variable)</FONT></TD>
	<TD align="center" height="18" width="19%"><FONT face="Arial" size="2">Yes
	(1)</FONT></TD>
	<TD align="center" height="18" width="19%"><FONT face="Arial" size="2">No
	(0)</FONT></TD>
	</TR>
	<TR>
	<TD height="19" width="37%"><FONT face="Arial" size="2">
	Yes (1)</FONT></TD>
	<TD align="center" height="19" width="19%"><FONT face="Arial" size="2">8</FONT></TD>
	<TD align="center" height="19" width="19%"><FONT face="Arial" size="2">37</FONT></TD>
	</TR>
	<TR>
	<TD height="19" width="37%"><FONT face="Arial" size="2">
	No (0)</FONT></TD>
	<TD align="center" height="19" width="19%"><FONT face="Arial" size="2">9</FONT></TD>
	<TD align="center" height="19" width="19%"><FONT face="Arial" size="2">168</FONT></TD>
	</TR>
	</TABLE>
	<P>
	<FONT face="Arial">First, to see if smoking modifies the catecholamine->CHD
	relationship, enter data to determine if the interaction between catecholamine
	and smoking is statistically significant.  The interaction level would
	be determined similarly to that described previously.</FONT>
	<P>
	<FONT face="Arial">exposure variable 1 level, exposure variable 2 level,
	interaction level, number <I> with<B><U>out</U></B></I> disease at this level,
	number <I>with</I> disease at this level.</FONT>
	<P>
	<FONT face="Arial">For this example data the number of data points is 4,
	the number of predictor variables is 3, and check the summary data box.
	The complete example data are shown below with the variables being cateholamine
	category, smoking category, interaction category, number without CHD at these
	levels, and number with CHD at these levels.  You could copy these data
	and paste them in the Data Window.</FONT>
	<P>
	<FONT face="Arial" size="2">1, 1, 1, 58, 19</FONT><BR>
	<FONT face="Arial" size="2">0, 1, 0, 275, 35</FONT><BR>
	<FONT face="Arial" size="2">1, 0, 0, 37, 8</FONT><BR>
	<FONT face="Arial" size="2">0, 0, 0, 168, 9</FONT>
	<P>
	<FONT face="Arial">The results of the analysis would be:</FONT>
	<P>
	<FONT face="Courier New" size="2">Coefficients and Standard Errors...<BR>
	Variable   Coeff. StdErr    p<BR>
	1          1.3953 0.5187
	0.0072<BR>
	2          0.8653 0.3864
	0.0251<BR>
	3         -0.4498 0.6092 0.4603<BR>
	Intercept -2.9267<BR>
	<BR>
	Odds Ratios and 95% Confidence Intervals...<BR>
	Variable    O.R.   Low -- High<BR>
	1          4.0360 1.4601
	11.1562<BR>
	2          2.3758 1.1141
	5.0661<BR>
	3          0.6377 0.1932
	2.1049</FONT>
	<P>
	<FONT face="Arial" size="2">T</FONT><FONT face="Arial">he interpretation
	would be that the interaction is not statistically significant (p-value for
	variable 3 = 0.4603) and could be removed from the model.  </FONT>
	<P>
	<FONT face="Arial">To determine whether smoking confounds the
	catecholamine->CHD association, two odds ratios are needed, a "crude"
	odds ratio from a logistic regression model with just catecholamine as a
	predictor of CHD which was 2.8615, and a logistic regression model with two
	predictors in the model, catecholamine and smoking.  The general format
	for the summary data is:</FONT>
	<P>
	<FONT face="Arial">exposure variable 1 level, exposure variable 2 level,
	number <I> with<B><U>out</U></B></I> disease at this level, number
	<I>with</I> disease at this level</FONT>
	<P>
	<FONT face="Arial">For this example data the number of data points is 4,
	the number of predictor variables is 2, and check the summary data box.
	The complete example data are shown below with the variables being cateholamine
	category, smoking category, number without CHD at these levels, and number
	with CHD at these levels.  You could copy these data and paste them
	in the Data Window.</FONT>
	<P>
	<FONT face="Arial" size="2">1, 1, 58, 19</FONT><BR>
	<FONT face="Arial" size="2">0, 1, 275, 35</FONT><BR>
	<FONT face="Arial" size="2">1, 0, 37, 8</FONT><BR>
	<FONT face="Arial" size="2">0, 0, 168, 9</FONT>
	<P>
	<FONT face="Arial">The results of the analysis would be:</FONT>
	<P>
	<FONT face="Courier New" size="2">Odds Ratios and 95% Confidence
	Intervals...<BR>
	Variable   O.R.   Low -- High<BR>
	1         2.9074 1.7079 4.9492<BR>
	2         2.0000 1.1206 3.5695</FONT>
	<P>
	<FONT face="Arial" size="2">T</FONT><FONT face="Arial">he interpretation
	would be that individuals with an elevated catecholamine level ("Variable
	1" in the above output) have an odds of CHD 2.9074 times greater than those
	with normal catecholamine levels controlling for cigarette smoking.
	Cigarette smokers ("Variable" 2 in the above output) have twice the odds
	(2.0000) of CHD compared to nonsmokers controlling for catecholamine (elevated
	vs. normal).  For the question of whether or not smoking confounds the
	catecholamine->CHD association, compare the crude odds ratio (2.8615)
	with the odds ratio adjusted for smoking (2.9074) - as a general rule, if
	these two differ by 10% or more, then confounding is present; if less than
	10%, there is not an important amount of confounding.  (Note that some
	investigators may choose to define confounding differently, perhaps at a
	5% difference.)  In this example, there is little evidence of
	confounding.  However, smoking does seem to be an important independent
	predictor of CHD when controlling for catecholamine.</FONT>
	<P>
	<HR>
	<H3>
	<FONT color="#0000ff" face="Arial">Questions or Problems?</FONT>
	</H3>
	<H4>
	*** <FONT face="Arial"><B>Not getting correct results or blank
	results?</B></FONT>
	</H4>
	<P>
	<FONT face="Arial">If you are not getting numeric results or an error message,
	please assure the following:</FONT>
	<UL>
	<LI>
	<FONT face="Arial">For each record or line of data, the data must be separated
	by a <B><I> comma</I></B> or <B><I>tab</I></B>; if there are just spaces
	between the data, you will get an error message or output with no calculated
	values.  </FONT>
	<LI>
	<FONT face="Arial">All data values must be numeric - character data (such
	as "Y" or "Yes" or "+") will not work.</FONT>
	<LI>
	<FONT face="Arial">The outcome variable must have a 1/0
	coding.  </FONT>
	<LI>
	<FONT face="Arial">There cannot be any blank lines in the data.</FONT>
	<LI>
	<FONT face="Arial">All records must have values for every exposure
	variable.</FONT>
	</UL>
	<H3>
	*** One (or more) of my coefficients came out very large (and the standard
	error is even larger!). Why did this happen?
	</H3>
	<P>
	This is probably due to what is called "the <I>perfect predictor</I> problem".
	This occurs when one of the predictor variables is perfectly divided into
	two distinct ranges for the two outcomes. For example, if you had an independent
	variable like Age, and everyone <B>above</B> age 50 <B>had</B> the outcome
	event, and everyone 50 and <B>below</B> did <B>not</B> have the event, then
	the logistic algorithm will not converge (the regression coefficient for
	Age will take off toward infinity). The same thing can happen with categorical
	predictors. And it gets even more insidious when there's more than one
	independent variable. None of the variables by themselves may look like "perfect
	predictors", but some subset of them taken together might form a pattern
	in n-dimensional space that can be sliced into two regions where everyone
	in one region had outcome=1 and everyone in the other region had outcome=0.
	This isn't a flaw in the web page; it's actually a situation where the logistic
	model is simply not appropriate for the data. The true relationship is a
	"step function", not the smooth "S-shaped" function of the logistic model.)
	<P>
	*** <FONT face="Arial"><B>How do I copy and paste data?</B></FONT>
	<P>
	<FONT face="Arial"><I><B>Copy data</B>:</I>  In most programs, you identify
	the data you want to copy then go to Edit->Copy</FONT>
	<P>
	<FONT face="Arial"><B><I>Paste data</I></B>: Open this logistic regression
	program; place the cursor in the <B>Data Window</B> and highlight the example
	data, then, in Windows, simultaneously press the <B>Ctrl</B> and <B>V </B>keys;
	Mac users press the <B>Command</B> and <B>V</B> keys.</FONT>
	<P>
	<FONT face="Arial"><B>*** Can I copy and paste from Excel?</B></FONT>
	<P>
	<FONT face="Arial">Yes, highlight the columns with the data, Edit->Copy
	the data, and paste into the Logistic <B>Data Window</B>.  Note that
	when you paste data from Excel into the <B>Data Window</B>, the different
	columns of data will be separated by a tab.  You cannot see the tab
	in the <B>Data Window</B>, but you can usually tell the difference between
	a tab and blank spaces by placing the cursor in a line of data, then move
	the cursor to the right one space of a time - a tab will make the cursor
	move many spaces.</FONT>
	<H3>
	<HR>
	</H3>
	<H3>
	<FONT color="#0000ff" face="Arial"><A name="Background">Background Info</A>
	(just what is logistic regression, anyway?):</FONT>
	</H3>
	<P>
	<FONT face="Arial"> <B>Ordinary</B> regression deals with finding a function
	that relates a <B>continuous</B> outcome variable (dependent variable
	<I>y</I>) to one or more predictors (independent variables
	<I>x</I><SUB>1</SUB>, <I>x</I><SUB>2</SUB>, etc.). Simple linear regression
	assumes a function of the form:<BR>
	<I>y</I> = c<SUB>0</SUB> + c<SUB>1</SUB> * <I>x</I><SUB>1</SUB> +
	c<SUB>2</SUB> * <I>x</I><SUB>2</SUB> +...<BR>
	and finds the values of c<SUB>0</SUB>, c<SUB>1</SUB>, c<SUB>2</SUB>, etc.
	(c<SUB>0</SUB> is called the "intercept" or "constant term"). </FONT>
	<P>
	<FONT face="Arial"> <B>Logistic</B> regression is a variation of ordinary
	regression, useful when the observed outcome is <B>restricted to two
	values</B>, which usually represent the occurrence or non-occurrence of some
	outcome event, (usually coded as 1 or 0, respectively). It produces a formula
	that predicts the <B>probability of the occurrence</B> as a function of the
	independent variables. </FONT>
	<P>
	<FONT face="Arial">Logistic regression fits a special s-shaped curve by taking
	the linear regression (above), which could produce any <I>y</I>-value between
	minus infinity and plus infinity, and transforming it with the function:<BR>
	<I>p</I> = Exp(<I>y</I>) / ( 1 + Exp(<I>y</I>) )<BR>
	which produces <I>p</I>-values between 0 (as <I>y</I> approaches minus infinity)
	and 1 (as <I>y</I> approaches plus infinity). This now becomes a special
	kind of <I>non-linear</I> regression, which is what this page performs. </FONT>
	<P>
	<FONT face="Arial">Logistic regression also produces <I>Odds Ratios</I> (O.R.)
	associated with each predictor value. The <I>odds</I> of an event is defined
	as the probability of the outcome event <B>occurring</B> divided by the
	probability of the event <B>not occurring</B>. The odds ratio for a predictor
	tells the relative amount by which the odds of the outcome increase (O.R.
	greater than 1.0) or decrease (O.R. less than 1.0) when the value of the
	predictor value is increased by 1.0 units. </FONT>
	<P>
	<HR>
	<H3>
	<P align="left">
	<FONT face="Arial"><B><A name="Techie">Techie-stuff</A> (for those who might
	be interested): </B></FONT>
	</H3>
	<P>
	<FONT face="Arial">This page contains a straightforward <I>JavaScript</I>
	implementation of a standard iterative method to maximize the Log Likelihood
	Function (LLF), defined as the sum of the logarithms of the predicted
	probabilities of occurrence for those cases where the event occurred and
	the logarithms of the predicted probabilities of non-occurrence for those
	cases where the event did not occur. </FONT>
	<P>
	<FONT face="Arial">Maximization is by Newton's method, with a very simple
	elimination algorithm to invert and solve the simultaneous equations.
	Central-limit estimates of parameter standard errors are obtained from the
	diagonal terms of the inverse matrix. Odds Ratios and their confidence limits
	are obtained by exponentiating the parameters and their lower and upper
	confidence limits (approximated by +/- 1.96 standard errors). </FONT>
	<P>
	<FONT face="Arial">No special convergence-acceleration techniques are used.
	For improved precision, the independent variables are temporarily converted
	to "standard scores" ( value - Mean ) / StdDev. The <I>Null Model</I> is
	used as the starting guess for the iterations -- all parameter coefficients
	are zero, and the intercept is the logarithm of the ratio of the number of
	cases with <I>y</I>=1 to the number with <I>y</I>=0. The quantity
	-2*Ln(Likelihood) is displayed for the null model, for each step of the
	iteration, and for the final (converged model). Convergence is not guaranteed,
	but this page should work properly with most practical problems that arise
	in real-world situations. </FONT>
	<P>
	<FONT face="Arial">This implementation has no predefined limits for the number
	of independent variables or cases. The actual limits are probably dependent
	on your web browser's available memory and other browser-specific restrictions.
	</FONT>
	<P>
	<FONT face="Arial">The fields below are pre-loaded with a very simple example.
	</FONT>
	<P>
	<FONT face="Arial" size="2">Notes: John Pezzullo wrote the program and the
	Instructions, Background Info, and Techie-Stuff sections; Kevin Sullivan
	modified the Instructions slightly and wrote the Data Examples sections.</FONT>
	<P>
	<FONT face="Arial">Reference: <I>Applied Logistic Regression</I>, by D.W.
	Hosmer and S. Lemeshow. 1989, John Wiley & Sons, New York </FONT>
	<P align="center">
	<HR>
	<FONT face="Arial"> <BR>
	</FONT>Return to the <A HREF="index.html">Interactive Statistics page</A>
	or to the <A HREF="JCPhome.html">JCP Home Page</A><BR>
	</FORM>
	</BODY></HTML>